# Author Hussain Abbas
# Copyright © Stats AI 2021. All Rights Reserved
import tensorflow as tf
import autokeras as ak
from tensorflow.keras import backend as K
import keras_tuner
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.metrics import roc_auc_score, precision_score, recall_score, fbeta_score, roc_curve
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tqdm import tqdm
# Verify GPU is detected and working
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
Num GPUs Available: 1
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"
#datasets located in C:/Users/USER/.keras/datasets
train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL)
train_data = pd.read_csv(train_file_path)
test_data = pd.read_csv(test_file_path)
df = pd.concat([train_data, test_data])
df.drop_duplicates(inplace=True)
df = df.reset_index()
df = df.drop(['index'], axis=1)
print('All Data Summary')
print(df.describe())
print('\n')
print('Train Data Summary')
print(train_data.describe())
print('\n')
print('Test Data Summary')
print(test_data.describe())
All Data Summary
survived age n_siblings_spouses parch fare
count 781.000000 781.000000 781.000000 781.000000 781.000000
mean 0.413572 29.622817 0.524968 0.417414 34.750464
std 0.492789 13.764671 0.987592 0.838132 52.237906
min 0.000000 0.420000 0.000000 0.000000 0.000000
25% 0.000000 22.000000 0.000000 0.000000 8.050000
50% 0.000000 28.000000 0.000000 0.000000 15.900000
75% 1.000000 36.000000 1.000000 1.000000 34.020800
max 1.000000 80.000000 8.000000 6.000000 512.329200
Train Data Summary
survived age n_siblings_spouses parch fare
count 627.000000 627.000000 627.000000 627.000000 627.000000
mean 0.387560 29.631308 0.545455 0.379585 34.385399
std 0.487582 12.511818 1.151090 0.792999 54.597730
min 0.000000 0.750000 0.000000 0.000000 0.000000
25% 0.000000 23.000000 0.000000 0.000000 7.895800
50% 0.000000 28.000000 0.000000 0.000000 15.045800
75% 1.000000 35.000000 1.000000 0.000000 31.387500
max 1.000000 80.000000 8.000000 5.000000 512.329200
Test Data Summary
survived age n_siblings_spouses parch fare
count 264.000000 264.000000 264.000000 264.000000 264.000000
mean 0.375000 28.720985 0.469697 0.386364 27.023880
std 0.485042 14.157538 0.978393 0.837775 34.973108
min 0.000000 0.420000 0.000000 0.000000 0.000000
25% 0.000000 21.000000 0.000000 0.000000 7.925000
50% 0.000000 28.000000 0.000000 0.000000 13.250000
75% 1.000000 35.250000 1.000000 0.000000 27.900000
max 1.000000 74.000000 8.000000 6.000000 263.000000
print('Train Data')
train_data.head()
Train Data
| survived | sex | age | n_siblings_spouses | parch | fare | class | deck | embark_town | alone | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | male | 22.0 | 1 | 0 | 7.2500 | Third | unknown | Southampton | n |
| 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | First | C | Cherbourg | n |
| 2 | 1 | female | 26.0 | 0 | 0 | 7.9250 | Third | unknown | Southampton | y |
| 3 | 1 | female | 35.0 | 1 | 0 | 53.1000 | First | C | Southampton | n |
| 4 | 0 | male | 28.0 | 0 | 0 | 8.4583 | Third | unknown | Queenstown | y |
print('Test Data')
test_data.head()
Test Data
| survived | sex | age | n_siblings_spouses | parch | fare | class | deck | embark_town | alone | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | male | 35.0 | 0 | 0 | 8.0500 | Third | unknown | Southampton | y |
| 1 | 0 | male | 54.0 | 0 | 0 | 51.8625 | First | E | Southampton | y |
| 2 | 1 | female | 58.0 | 0 | 0 | 26.5500 | First | C | Southampton | y |
| 3 | 1 | female | 55.0 | 0 | 0 | 16.0000 | Second | unknown | Southampton | y |
| 4 | 1 | male | 34.0 | 0 | 0 | 13.0000 | Second | D | Southampton | y |
def recall_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def f_beta_score(y_true, y_pred):
a = 0.5 ** 2
b = 1 + a
precision = precision_m(y_true, y_pred)
recall = recall_m(y_true, y_pred)
return b*((precision*recall)/(a*precision+recall+K.epsilon()))
def ak_predict(model, data):
pred_input = data.astype(np.compat.unicode)
predicted = model.predict(pred_input).flatten()
pred_result = predicted
#cut_off = 0.5
#pred_result = [1 if x > cut_off else 0 for x in predicted]
return pred_result
def jdl(y_true, y_pred, smooth=100):
"""
Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
= sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
The jaccard distance loss is usefull for unbalanced datasets. This has been
shifted so it converges on 0 and is smoothed to avoid exploding or disapearing
gradient.
Ref: https://en.wikipedia.org/wiki/Jaccard_index
@url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96
@author: wassname
"""
intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
jac = (intersection + smooth) / (sum_ - intersection + smooth)
return (1 - jac) * smooth
from tensorflow.keras.utils import CustomObjectScope
from sklearn.utils import class_weight
with CustomObjectScope({'f_beta_score': f_beta_score,
'jdl': jdl, }):
results = []
# number of times we partition the data into training/test set
outer_loop_folds = 1
# number of times we partition the training data into training/validation set
inner_loop_folds = 1
#max_trials: Default= 100. The max num of different models to try
num_trials = 20
#epochs: If unspecified, we use epochs equal to 1000 and early stopping with patience equal to 30
Early_Stopping = tf.keras.callbacks.EarlyStopping(monitor='val_f_beta_score', patience=101)
for j in tqdm(range(outer_loop_folds)):
#Randomly split df into 80% train, 20% test
x_train, x_test, y_train, y_test = train_test_split(df.drop('survived', axis=1),
df.survived, test_size=0.2,
stratify = df.survived)
for i in tqdm(range(inner_loop_folds)):
# Further randomly split the 80% train into 64% train and 16% validation
x_inner_train, x_inner_val, y_inner_train, y_inner_val = train_test_split(x_train,
y_train, test_size=0.2,
stratify = y_train)
w = y_inner_train.value_counts(normalize = True)[0]/y_inner_train.value_counts(normalize = True)[1]
cw = {0: 1., 1: w}
#cw = {0: 1., 1: 0.5}
# Try max_trial different models
clf = ak.StructuredDataClassifier(
overwrite=True,
max_trials = num_trials,
#tuner = 'random',
#tuner = 'hyperband',
tuner = 'bayesian',
metrics=[jdl,
'binary_crossentropy',
tf.keras.metrics.AUC(name='auc'),
tf.keras.metrics.BinaryAccuracy(name='accuracy'),
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall'),
f_beta_score],
objective=keras_tuner.Objective('val_f_beta_score', direction='max'),
#objective=keras_tuner.Objective('val_jdl', direction='min'),
#loss = jdl,
)
try:
# Fit the best model
clf.fit(x_inner_train, y_inner_train,
validation_data = (x_inner_val, y_inner_val),
#class_weight = cw
epochs = 3000,
callbacks = [Early_Stopping]
)
# Predict with the best model
x = clf.evaluate(x_test, y_test)
x_test_loss, x_jdl, x_bc, x_auc, x_accuracy, x_precision, x_recall, x_f_beta_score= x
# Save the results
model_name = 'model_autokeras_' + str(j) + '_'+ str(i)
results.append([model_name, j, i,
x_test_loss, x_jdl, x_bc,
x_auc, x_accuracy,
x_precision, x_recall,
x_f_beta_score])
except:
print("Issue training model")
try:
# Save the model after each j, i iteration
model = clf.export_model()
model.save(model_name, save_format="tf")
except:
print("Issue saving model")
results = pd.DataFrame(results, columns = ['model_name', 'j', 'i', 'Test_loss','Loss:JDL', 'Loss:Binary Cross Entropy',
'AUC', 'Accuracy', 'Precision', 'Recall', 'F_Beta_Score'])
Trial 20 Complete [00h 00m 27s] val_f_beta_score: 0.8885974287986755 Best val_f_beta_score So Far: 0.9095396399497986 Total elapsed time: 00h 11m 39s INFO:tensorflow:Oracle triggered exit WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. INFO:tensorflow:Assets written to: .\structured_data_classifier\best_model\assets WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1 WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2 WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details. 5/5 [==============================] - 1s 7ms/step - loss: 0.5867 - jdl: 0.4320 - binary_crossentropy: 0.5867 - auc: 0.8290 - accuracy: 0.7707 - precision: 0.7736 - recall: 0.6308 - f_beta_score: 0.7413 INFO:tensorflow:Assets written to: model_autokeras_0_0\assets
100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [11:50<00:00, 710.33s/it] 100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [11:50<00:00, 710.33s/it]
%%capture cap --no-stderr
clf.tuner.results_summary()
with open('model_val_info.txt', 'w') as f:
f.write(str(cap))
z = open('model_val_info.txt').read()
z = re.findall(r'Score: ([^/]+)', z)
z = np.array([x.split()[0] for x in z]).astype(np.float)
results.describe()
| j | i | Test_loss | Loss:JDL | Loss:Binary Cross Entropy | AUC | Accuracy | Precision | Recall | F_Beta_Score | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 1.0 | 1.0 | 1.000000 | 1.00000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| mean | 0.0 | 0.0 | 0.586716 | 0.43196 | 0.586716 | 0.829013 | 0.770701 | 0.773585 | 0.630769 | 0.741339 |
| std | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| min | 0.0 | 0.0 | 0.586716 | 0.43196 | 0.586716 | 0.829013 | 0.770701 | 0.773585 | 0.630769 | 0.741339 |
| 25% | 0.0 | 0.0 | 0.586716 | 0.43196 | 0.586716 | 0.829013 | 0.770701 | 0.773585 | 0.630769 | 0.741339 |
| 50% | 0.0 | 0.0 | 0.586716 | 0.43196 | 0.586716 | 0.829013 | 0.770701 | 0.773585 | 0.630769 | 0.741339 |
| 75% | 0.0 | 0.0 | 0.586716 | 0.43196 | 0.586716 | 0.829013 | 0.770701 | 0.773585 | 0.630769 | 0.741339 |
| max | 0.0 | 0.0 | 0.586716 | 0.43196 | 0.586716 | 0.829013 | 0.770701 | 0.773585 | 0.630769 | 0.741339 |
results['F1_Beta_Val'] = z.max()
results
| model_name | j | i | Test_loss | Loss:JDL | Loss:Binary Cross Entropy | AUC | Accuracy | Precision | Recall | F_Beta_Score | F1_Beta_Val | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | model_autokeras_0_0 | 0 | 0 | 0.586716 | 0.43196 | 0.586716 | 0.829013 | 0.770701 | 0.773585 | 0.630769 | 0.741339 | 0.90954 |
#best_model = results.loc[np.argmax(results.test_accuracy)].model_name
best_model = results.loc[np.argmax(results.F_Beta_Score)].model_name
best_model
'model_autokeras_0_0'
from tensorflow.keras.models import load_model
my_custom_objects={'f_beta_score': f_beta_score,
'jdl': jdl, }
my_custom_objects.update(ak.CUSTOM_OBJECTS)
model_ak = load_model(best_model, custom_objects=my_custom_objects)
model_ak.summary()
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 9)] 0 _________________________________________________________________ multi_category_encoding (Mul (None, 9) 0 _________________________________________________________________ dense (Dense) (None, 1024) 10240 _________________________________________________________________ batch_normalization (BatchNo (None, 1024) 4096 _________________________________________________________________ re_lu (ReLU) (None, 1024) 0 _________________________________________________________________ dropout (Dropout) (None, 1024) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 1024) 0 _________________________________________________________________ dense_1 (Dense) (None, 1) 1025 _________________________________________________________________ classification_head_1 (Activ (None, 1) 0 ================================================================= Total params: 15,361 Trainable params: 13,313 Non-trainable params: 2,048 _________________________________________________________________
# type: pandas.core.frame.DataFrame
pred_input = x_test.astype(np.compat.unicode)
# type: numpy.ndarray
predicted = model_ak.predict(pred_input).flatten()
cut_off = 0.5
pred_result = [1 if x > cut_off else 0 for x in predicted]
pred_result = np.array(pred_result)
actual = y_test.to_numpy()
actual = actual.flatten()
cm = tf.math.confusion_matrix(actual, pred_result)
cm = cm/cm.numpy().sum(axis=1)[:, tf.newaxis]
sns.heatmap(
cm, annot=True,
xticklabels=['no', 'yes'],
yticklabels=['no', 'yes'])
plt.xlabel("Predicted")
plt.ylabel("True")
'''
https://towardsdatascience.com/accuracy-precision-recall-or-f1-331fb37c5cb9
https://stackoverflow.com/questions/44172162/f1-score-vs-roc-auc
- Maximize Precision when False Positives are of concern
- Maximize Recall when False Negatives are of concern
- Maximize F1 Score when both are important and classes are unbalanced
'''
auc_score = roc_auc_score(actual, pred_result)
precision = precision_score(actual, pred_result)
recall = recall_score(actual, pred_result)
f_beta = fbeta_score(actual, pred_result, beta = 0.5)
print("Cut-Off:", cut_off)
print("ROC-AUC-Score:", auc_score)
print('Precision: ' + str(precision))
print('Recall: ' + str(recall))
print('F_Beta: ' + str(f_beta))
y_test_classes = list(set(y_test))
# print Confusion Matrix from Sklearn
cm = confusion_matrix(actual, pred_result, labels = y_test_classes)
#cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels = y_test_classes)
disp.plot();
Cut-Off: 0.5 ROC-AUC-Score: 0.7501672240802675 Precision: 0.7735849056603774 Recall: 0.6307692307692307 F_Beta: 0.740072202166065
# compute true positive rate and false positive rate
false_positive_rate, true_positive_rate, thresholds = roc_curve(actual, pred_result)
# plotting them against each other
def plot_roc_curve(false_positive_rate, true_positive_rate, label=None):
plt.plot(false_positive_rate, true_positive_rate, linewidth=2, label=label)
plt.plot([0, 1], [0, 1], 'r', linewidth=4)
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate (FPR)', fontsize=16)
plt.ylabel('True Positive Rate (TPR)', fontsize=16)
plt.figure(figsize=(14, 7))
plot_roc_curve(false_positive_rate, true_positive_rate)
plt.show()
# https://towardsdatascience.com/predicting-the-survival-of-titanic-passengers-30870ccc7e8
# https://stackoverflow.com/questions/44172162/f1-score-vs-roc-auc
# Naive Random Coin Flip Classifier Performance
predicted = np.random.randint(0,2, size = len(y_test))
pred_result = predicted.flatten()
actual = y_test.to_numpy()
actual = actual.flatten()
cm = tf.math.confusion_matrix(actual, pred_result)
cm = cm/cm.numpy().sum(axis=1)[:, tf.newaxis]
sns.heatmap(
cm, annot=True,
xticklabels=['no', 'yes'],
yticklabels=['no', 'yes'])
plt.xlabel("Predicted")
plt.ylabel("True")
#true_positives = tf.math.count_nonzero(pred_result * actual)
#true_negatives = tf.math.count_nonzero((pred_result - 1) * (actual - 1))
#false_positives = tf.math.count_nonzero(pred_result * (actual - 1))
#false_negatives = tf.math.count_nonzero((pred_result - 1) * actual)
#precision = true_positives / (true_positives + false_positives)
#recall = true_positives / (true_positives + false_negatives)
#f1 = 2 * precision * recall / (precision + recall)
#print("Precision: " + str(np.array(precision).flatten()[0]))
#print("Recall: " + str(np.array(recall).flatten()[0]))
#print("F1: " + str(np.array(f1).flatten()[0]))
#print('')
'''
https://towardsdatascience.com/accuracy-precision-recall-or-f1-331fb37c5cb9
https://stackoverflow.com/questions/44172162/f1-score-vs-roc-auc
- Maximize Precision when False Positives are of concern
- Maximize Recall when False Negatives are of concern
- Maximize F1 Score when both are important and classes are unbalanced
'''
auc_score = roc_auc_score(actual, pred_result)
precision = precision_score(actual, pred_result)
recall = recall_score(actual, pred_result)
f_beta = fbeta_score(actual, pred_result, beta = 1)
print("ROC-AUC-Score:", auc_score)
print('Precision: ' + str(precision))
print('Recall: ' + str(recall))
print('F_Beta: ' + str(f_beta))
y_test_classes = list(set(y_test))
# print Confusion Matrix from Sklearn
cm = confusion_matrix(actual, pred_result, labels = y_test_classes)
#cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels = y_test_classes)
disp.plot();
ROC-AUC-Score: 0.5147157190635451 Precision: 0.42857142857142855 Recall: 0.5076923076923077 F_Beta: 0.46478873239436613
import dalex as dx
X, y = df.drop('survived', axis=1), df.survived
n, p = X.shape
explainer_keras = dx.Explainer(model_ak,
data = X,
y = y,
predict_function = ak_predict,
label = 'autokeras',
#predict_function = dx._explainer.yhat.yhat_tf_classification,
model_type = 'classification'
)
Preparation of a new explainer is initiated -> data : 781 rows 9 cols -> target variable : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray. -> target variable : 781 values -> model_class : tensorflow.python.keras.engine.functional.Functional (default) -> label : autokeras -> predict function : <function ak_predict at 0x000001D1EDEE6F70> will be used -> predict function : Accepts pandas.DataFrame and numpy.ndarray. -> predicted values : min = 0.134, mean = 0.458, max = 1.0 -> model type : classification will be used -> residual function : difference between y and yhat (default) -> residuals : min = -0.782, mean = -0.0446, max = 0.758 -> model_info : package tensorflow A new explainer has been created!
explainer_keras.model_performance()
| recall | precision | f1 | accuracy | auc | |
|---|---|---|---|---|---|
| autokeras | 0.594427 | 0.803347 | 0.683274 | 0.772087 | 0.827971 |
explainer_keras.model_diagnostics().result
| sex | age | n_siblings_spouses | parch | fare | class | deck | embark_town | alone | y | y_hat | residuals | abs_residuals | label | ids | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | male | 22.0 | 1 | 0 | 7.2500 | Third | unknown | Southampton | n | 0 | 0.334225 | -0.334225 | 0.334225 | autokeras | 1 |
| 1 | female | 38.0 | 1 | 0 | 71.2833 | First | C | Cherbourg | n | 1 | 0.511104 | 0.488896 | 0.488896 | autokeras | 2 |
| 2 | female | 26.0 | 0 | 0 | 7.9250 | Third | unknown | Southampton | y | 1 | 0.467287 | 0.532713 | 0.532713 | autokeras | 3 |
| 3 | female | 35.0 | 1 | 0 | 53.1000 | First | C | Southampton | n | 1 | 0.508692 | 0.491308 | 0.491308 | autokeras | 4 |
| 4 | male | 28.0 | 0 | 0 | 8.4583 | Third | unknown | Queenstown | y | 0 | 0.419327 | -0.419327 | 0.419327 | autokeras | 5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 776 | female | 56.0 | 0 | 1 | 83.1583 | First | C | Cherbourg | n | 1 | 0.498832 | 0.501168 | 0.501168 | autokeras | 777 |
| 777 | female | 25.0 | 0 | 1 | 26.0000 | Second | unknown | Southampton | n | 1 | 0.571394 | 0.428606 | 0.428606 | autokeras | 778 |
| 778 | male | 33.0 | 0 | 0 | 7.8958 | Third | unknown | Southampton | y | 0 | 0.369783 | -0.369783 | 0.369783 | autokeras | 779 |
| 779 | female | 39.0 | 0 | 5 | 29.1250 | Third | unknown | Queenstown | n | 0 | 0.501495 | -0.501495 | 0.501495 | autokeras | 780 |
| 780 | male | 26.0 | 0 | 0 | 30.0000 | First | C | Cherbourg | y | 1 | 0.463082 | 0.536918 | 0.536918 | autokeras | 781 |
781 rows × 15 columns
explainer_keras.model_parts().plot()
explainer_keras.model_profile().plot(variables=['sex', 'class', 'fare', 'deck',
'n_siblings_spouses', 'embark_town',
'parch', 'alone', 'age'])
Calculating ceteris paribus: 100%|███████████████████████████████████████████████████████| 9/9 [00:07<00:00, 1.14it/s]
explainer_keras.predict_parts(X.loc[0], type='shap').plot()
explainer_keras.predict_parts(X.loc[1], type='shap').plot()
X_one_hot = pd.get_dummies(X, drop_first=True)
X_one_hot
| age | n_siblings_spouses | parch | fare | sex_male | class_Second | class_Third | deck_B | deck_C | deck_D | deck_E | deck_F | deck_G | deck_unknown | embark_town_Queenstown | embark_town_Southampton | embark_town_unknown | alone_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 22.0 | 1 | 0 | 7.2500 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
| 1 | 38.0 | 1 | 0 | 71.2833 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 26.0 | 0 | 0 | 7.9250 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 |
| 3 | 35.0 | 1 | 0 | 53.1000 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 4 | 28.0 | 0 | 0 | 8.4583 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 776 | 56.0 | 0 | 1 | 83.1583 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 777 | 25.0 | 0 | 1 | 26.0000 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
| 778 | 33.0 | 0 | 0 | 7.8958 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 |
| 779 | 39.0 | 0 | 5 | 29.1250 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 |
| 780 | 26.0 | 0 | 0 | 30.0000 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
781 rows × 18 columns
from sklearn import tree
clf = tree.DecisionTreeClassifier(max_features = 5, max_depth = 3)
X_one_hot = pd.get_dummies(X, drop_first=True)
clf = clf.fit(X_one_hot, y)
df[df.sex == 'male'].survived.value_counts()
0 382 1 106 Name: survived, dtype: int64
df[df.sex == 'female'].survived.value_counts()
1 217 0 76 Name: survived, dtype: int64
df.survived.value_counts()
0 458 1 323 Name: survived, dtype: int64
#clf.classes_
fn = list(X_one_hot.columns)
cn = ['did not survive', 'survived']
#cn = ['survived', 'did not survive']
fig, axes = plt.subplots(nrows = 1,
ncols = 1,
figsize = (30,30))
#dpi=500)
tree.plot_tree(clf,
feature_names = fn,
class_names=cn,
filled = True, fontsize = 15)
#fig.savefig('imagename.png')
[Text(837.0, 1426.95, 'sex_male <= 0.5\ngini = 0.485\nsamples = 781\nvalue = [458, 323]\nclass = did not survive'), Text(418.5, 1019.25, 'embark_town_Southampton <= 0.5\ngini = 0.384\nsamples = 293\nvalue = [76, 217]\nclass = survived'), Text(209.25, 611.55, 'deck_B <= 0.5\ngini = 0.298\nsamples = 99\nvalue = [18, 81]\nclass = survived'), Text(104.625, 203.8499999999999, 'gini = 0.331\nsamples = 86\nvalue = [18, 68]\nclass = survived'), Text(313.875, 203.8499999999999, 'gini = 0.0\nsamples = 13\nvalue = [0, 13]\nclass = survived'), Text(627.75, 611.55, 'class_Second <= 0.5\ngini = 0.419\nsamples = 194\nvalue = [58, 136]\nclass = survived'), Text(523.125, 203.8499999999999, 'gini = 0.48\nsamples = 130\nvalue = [52, 78]\nclass = survived'), Text(732.375, 203.8499999999999, 'gini = 0.17\nsamples = 64\nvalue = [6, 58]\nclass = survived'), Text(1255.5, 1019.25, 'deck_E <= 0.5\ngini = 0.34\nsamples = 488\nvalue = [382, 106]\nclass = did not survive'), Text(1046.25, 611.55, 'parch <= 0.5\ngini = 0.325\nsamples = 471\nvalue = [375, 96]\nclass = did not survive'), Text(941.625, 203.8499999999999, 'gini = 0.295\nsamples = 384\nvalue = [315, 69]\nclass = did not survive'), Text(1150.875, 203.8499999999999, 'gini = 0.428\nsamples = 87\nvalue = [60, 27]\nclass = did not survive'), Text(1464.75, 611.55, 'fare <= 55.671\ngini = 0.484\nsamples = 17\nvalue = [7, 10]\nclass = survived'), Text(1360.125, 203.8499999999999, 'gini = 0.408\nsamples = 14\nvalue = [4, 10]\nclass = survived'), Text(1569.375, 203.8499999999999, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]\nclass = did not survive')]
import dalex as dx
data = pd.read_csv("https://raw.githubusercontent.com/pbiecek/xai-happiness/main/happiness.csv", index_col=0)
data.head()
| score | gdp_per_capita | social_support | healthy_life_expectancy | freedom_to_make_life_choices | generosity | perceptions_of_corruption | |
|---|---|---|---|---|---|---|---|
| Afghanistan | 3.203 | 0.350 | 0.517 | 0.361 | 0.000 | 0.158 | 0.025 |
| Albania | 4.719 | 0.947 | 0.848 | 0.874 | 0.383 | 0.178 | 0.027 |
| Algeria | 5.211 | 1.002 | 1.160 | 0.785 | 0.086 | 0.073 | 0.114 |
| Argentina | 6.086 | 1.092 | 1.432 | 0.881 | 0.471 | 0.066 | 0.050 |
| Armenia | 4.559 | 0.850 | 1.055 | 0.815 | 0.283 | 0.095 | 0.064 |
X, y = data.drop('score', axis=1), data.score
n, p = X.shape
X
| gdp_per_capita | social_support | healthy_life_expectancy | freedom_to_make_life_choices | generosity | perceptions_of_corruption | |
|---|---|---|---|---|---|---|
| Afghanistan | 0.350 | 0.517 | 0.361 | 0.000 | 0.158 | 0.025 |
| Albania | 0.947 | 0.848 | 0.874 | 0.383 | 0.178 | 0.027 |
| Algeria | 1.002 | 1.160 | 0.785 | 0.086 | 0.073 | 0.114 |
| Argentina | 1.092 | 1.432 | 0.881 | 0.471 | 0.066 | 0.050 |
| Armenia | 0.850 | 1.055 | 0.815 | 0.283 | 0.095 | 0.064 |
| ... | ... | ... | ... | ... | ... | ... |
| Venezuela | 0.960 | 1.427 | 0.805 | 0.154 | 0.064 | 0.047 |
| Vietnam | 0.741 | 1.346 | 0.851 | 0.543 | 0.147 | 0.073 |
| Yemen | 0.287 | 1.163 | 0.463 | 0.143 | 0.108 | 0.077 |
| Zambia | 0.578 | 1.058 | 0.426 | 0.431 | 0.247 | 0.087 |
| Zimbabwe | 0.366 | 1.114 | 0.433 | 0.361 | 0.151 | 0.089 |
156 rows × 6 columns
y
Afghanistan 3.203
Albania 4.719
Algeria 5.211
Argentina 6.086
Armenia 4.559
...
Venezuela 4.707
Vietnam 5.175
Yemen 3.380
Zambia 4.107
Zimbabwe 3.663
Name: score, Length: 156, dtype: float64
#tf.random.set_seed(11)
normalizer = tf.keras.layers.experimental.preprocessing.Normalization(input_shape=[p,])
normalizer.adapt(X.to_numpy())
model = tf.keras.Sequential([
normalizer,
tf.keras.Input(shape=(p,)),
tf.keras.layers.Dense(p*2, activation='relu'),
tf.keras.layers.Dense(p*3, activation='relu'),
tf.keras.layers.Dense(p*2, activation='relu'),
tf.keras.layers.Dense(p, activation='relu'),
tf.keras.layers.Dense(1, activation='linear')
])
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.keras.losses.mae
)
model.fit(X, y, batch_size=int(n/10), epochs=2000, verbose=False)
WARNING:tensorflow:Please add `keras.layers.InputLayer` instead of `keras.Input` to Sequential model. `keras.Input` is intended to be used by Functional model.
<tensorflow.python.keras.callbacks.History at 0x1d3a2287340>
#type(model)
#model.output_shape
explainer = dx.Explainer(model, X, y, label='happiness')
Preparation of a new explainer is initiated -> data : 156 rows 6 cols -> target variable : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray. -> target variable : 156 values -> model_class : tensorflow.python.keras.engine.sequential.Sequential (default) -> label : happiness -> predict function : <function yhat_tf_regression at 0x000001D1C4457790> will be used (default) -> predict function : Accepts pandas.DataFrame and numpy.ndarray. -> predicted values : min = 2.88, mean = 5.4, max = 7.56 -> model type : regression will be used (default) -> residual function : difference between y and yhat (default) -> residuals : min = -0.63, mean = 0.0104, max = 0.697 -> model_info : package tensorflow A new explainer has been created!
#explainer_new = dx.Explainer(model, X, y, label='happiness',
# predict_function = dx._explainer.yhat.yhat_tf_regression)
#explainer.predict_function
#dx._explainer.yhat.yhat_tf_regression(model, X)
#explainer.residual_function
#dx._explainer.checks.check_residual_function.residual_function
#def rf(_model, _data, _y):
# return _y - dx._explainer.yhat.yhat_tf_regression(model, X)
#rf(model, X, y)
explainer.model_performance()
| mse | rmse | r2 | mae | mad | |
|---|---|---|---|---|---|
| happiness | 0.025321 | 0.159127 | 0.979432 | 0.100341 | 0.05421 |
explainer.model_parts().plot()
explainer.model_profile().plot(variables=['social_support', 'healthy_life_expectancy',
'gdp_per_capita', 'freedom_to_make_life_choices'])
Calculating ceteris paribus: 100%|███████████████████████████████████████████████████████| 6/6 [00:03<00:00, 1.99it/s]
explainer.model_diagnostics().plot(variable='social_support', yvariable="abs_residuals", marker_size=5, line_width=3)
explainer.model_diagnostics().result
| gdp_per_capita | social_support | healthy_life_expectancy | freedom_to_make_life_choices | generosity | perceptions_of_corruption | y | y_hat | residuals | abs_residuals | label | ids | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Afghanistan | 0.350 | 0.517 | 0.361 | 0.000 | 0.158 | 0.025 | 3.203 | 3.282040 | -0.079040 | 0.079040 | happiness | 1 |
| Albania | 0.947 | 0.848 | 0.874 | 0.383 | 0.178 | 0.027 | 4.719 | 4.730867 | -0.011867 | 0.011867 | happiness | 2 |
| Algeria | 1.002 | 1.160 | 0.785 | 0.086 | 0.073 | 0.114 | 5.211 | 5.171719 | 0.039281 | 0.039281 | happiness | 3 |
| Argentina | 1.092 | 1.432 | 0.881 | 0.471 | 0.066 | 0.050 | 6.086 | 6.114773 | -0.028773 | 0.028773 | happiness | 4 |
| Armenia | 0.850 | 1.055 | 0.815 | 0.283 | 0.095 | 0.064 | 4.559 | 4.554076 | 0.004924 | 0.004924 | happiness | 5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Venezuela | 0.960 | 1.427 | 0.805 | 0.154 | 0.064 | 0.047 | 4.707 | 4.672098 | 0.034902 | 0.034902 | happiness | 152 |
| Vietnam | 0.741 | 1.346 | 0.851 | 0.543 | 0.147 | 0.073 | 5.175 | 5.805411 | -0.630411 | 0.630411 | happiness | 153 |
| Yemen | 0.287 | 1.163 | 0.463 | 0.143 | 0.108 | 0.077 | 3.380 | 3.471087 | -0.091087 | 0.091087 | happiness | 154 |
| Zambia | 0.578 | 1.058 | 0.426 | 0.431 | 0.247 | 0.087 | 4.107 | 4.155517 | -0.048517 | 0.048517 | happiness | 155 |
| Zimbabwe | 0.366 | 1.114 | 0.433 | 0.361 | 0.151 | 0.089 | 3.663 | 3.833301 | -0.170301 | 0.170301 | happiness | 156 |
156 rows × 12 columns
explainer.predict_parts(X.loc['Poland'], type='shap').plot()
Exception ignored in: <function CapturableResource.__del__ at 0x000001D1E4C61EE0>
Traceback (most recent call last):
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\training\tracking\tracking.py", line 277, in __del__
self._destroy_resource()
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 924, in _call
results = self._stateful_fn(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3022, in __call__
filtered_flat_args) = self._maybe_define_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
AttributeError: 'NoneType' object has no attribute '__wrapped__'
Exception ignored in: <function CapturableResource.__del__ at 0x000001D1E4C61EE0>
Traceback (most recent call last):
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\training\tracking\tracking.py", line 277, in __del__
self._destroy_resource()
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 924, in _call
results = self._stateful_fn(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3022, in __call__
filtered_flat_args) = self._maybe_define_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
AttributeError: 'NoneType' object has no attribute '__wrapped__'
Exception ignored in: <function CapturableResource.__del__ at 0x000001D1E4C61EE0>
Traceback (most recent call last):
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\training\tracking\tracking.py", line 277, in __del__
self._destroy_resource()
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 924, in _call
results = self._stateful_fn(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3022, in __call__
filtered_flat_args) = self._maybe_define_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
AttributeError: 'NoneType' object has no attribute '__wrapped__'
Exception ignored in: <function CapturableResource.__del__ at 0x000001D1E4C61EE0>
Traceback (most recent call last):
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\training\tracking\tracking.py", line 277, in __del__
self._destroy_resource()
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 924, in _call
results = self._stateful_fn(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3022, in __call__
filtered_flat_args) = self._maybe_define_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
AttributeError: 'NoneType' object has no attribute '__wrapped__'
Exception ignored in: <function CapturableResource.__del__ at 0x000001D1E4C61EE0>
Traceback (most recent call last):
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\training\tracking\tracking.py", line 277, in __del__
self._destroy_resource()
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 924, in _call
results = self._stateful_fn(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3022, in __call__
filtered_flat_args) = self._maybe_define_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
AttributeError: 'NoneType' object has no attribute '__wrapped__'
Exception ignored in: <function CapturableResource.__del__ at 0x000001D1E4C61EE0>
Traceback (most recent call last):
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\training\tracking\tracking.py", line 277, in __del__
self._destroy_resource()
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 924, in _call
results = self._stateful_fn(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3022, in __call__
filtered_flat_args) = self._maybe_define_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
AttributeError: 'NoneType' object has no attribute '__wrapped__'
Exception ignored in: <function CapturableResource.__del__ at 0x000001D1E4C61EE0>
Traceback (most recent call last):
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\training\tracking\tracking.py", line 277, in __del__
self._destroy_resource()
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 924, in _call
results = self._stateful_fn(*args, **kwds)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3022, in __call__
filtered_flat_args) = self._maybe_define_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\deepl\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
AttributeError: 'NoneType' object has no attribute '__wrapped__'
pp_list = []
for country in ['Afghanistan', 'Belgium', 'China', 'Denmark', 'Ethiopia']:
pp = explainer.predict_parts(X.loc[country], type='break_down')
pp.result.label = country
pp_list += [pp]
pp_list[0].plot(pp_list[1::], min_max=[2.5, 8.5])
lime_explanation = explainer.predict_surrogate(X.loc['United States'], mode='regression')
lime_explanation.plot()
lime_explanation.result
| variable | effect | |
|---|---|---|
| 0 | gdp_per_capita > 1.23 | 0.553652 |
| 1 | social_support > 1.45 | 0.467272 |
| 2 | 0.09 < perceptions_of_corruption <= 0.14 | -0.264506 |
| 3 | generosity > 0.25 | -0.087135 |
| 4 | 0.79 < healthy_life_expectancy <= 0.88 | 0.065358 |
| 5 | 0.42 < freedom_to_make_life_choices <= 0.51 | 0.045682 |
surrogate_model = explainer.model_surrogate(max_vars=4, max_depth=3)
surrogate_model.performance
| mse | rmse | r2 | mae | mad | |
|---|---|---|---|---|---|
| DecisionTreeRegressor | 0.195621 | 0.442291 | 0.820771 | 0.353305 | 0.286188 |
surrogate_model.plot()